/*
 * Copyright (c) 2013, Intel Corporation
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 *   * Redistributions of source code must retain the above copyright notice,
 *     this list of conditions and the following disclaimer.
 *   * Redistributions in binary form must reproduce the above copyright
 *     notice, this list of conditions and the following disclaimer in the
 *     documentation and/or other materials provided with the distribution.
 *   * Neither the name of Intel Corporation nor the names of its contributors
 *     may be used to endorse or promote products derived from this software
 *     without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
 
/*
 * The application can generate LMA (Local Memory Access) and RMA
 * (Remote Memory Access) with latency information on NUMA system.
 *
 * Please note the latencies reported by mgen are not the official data
 * from Intel/IBM. It is just a tool to test numatop.
 */

#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sched.h>
#include <pthread.h>
#include <sys/mman.h> 
#include <sys/shm.h>
#include <errno.h>
#include <sys/mman.h>
#include <stdarg.h>
#include <libgen.h>
#include <inttypes.h>
#include <string.h>
#include <signal.h>
#include <strings.h>
#include <fcntl.h>
#include <unistd.h>
#include <sys/resource.h>
#include <numa.h>
#include "../../common/include/util.h"
#include "../../common/include/os/os_util.h"
#include "./include/util.h"

double s_nsofclk;
uint64_t s_clkofsec;
double s_latest_avglat = 0.0;
struct timeval s_tvbase;

static int s_rand_arr[RAND_ARRAY_SIZE];
static int s_ncpus;
static void *s_buf = NULL;
static unsigned int s_randseed;

static void *buf_create(int);
static void buf_release(void *);
static int dependent_read(void *, int, int, int);

static void
print_usage(const char *exec_name)
{
	char buffer[PATH_MAX];

	strncpy(buffer, exec_name, PATH_MAX);
	buffer[PATH_MAX - 1] = 0;

	printf("Usage: %s [option(s)]\n", basename(buffer));
	printf("Options:\n"
	    "    -h: print helps\n"
	    "    -a: the node where the memory is allocated on\n"
	    "    -c: the cpu where creates a thread to access memory.\n"
	    "    -t: the seconds for measuring.\n"
	    "    -s: the random seed to build random address array (just for reproducing).\n");
	printf("\nFor example:\n"
	    "    1. Generate LMA for 10s (memory allocated on node1, thread runs on cpu1):\n"
	    "       %s -a 1 -c 1 -t 10\n"
	    "    2. Generate RMA for 10s (memory allocated on node0, thread runs on cpu10):\n"
	    "       %s -a 0 -c 10 -t 10\n",
	    basename(buffer), basename(buffer));
}

static void
sigint_handler(int sig)
{
	switch (sig) {
	case SIGINT:
		(void) signal(SIGINT, sigint_handler);
		break;

	case SIGHUP:
		(void) signal(SIGINT, sigint_handler);
		break;
	
	case SIGQUIT:
		(void) signal(SIGINT, sigint_handler);
		break;

	case SIGPIPE:
		(void) signal(SIGINT, sigint_handler);
		break;

	case SIGTERM:
		(void) signal(SIGINT, sigint_handler);
		break;
	}

	printf("-------------------------\n");
	printf("%24s\n", "*** Terminated! ***");
	if (s_latest_avglat > 0.0) {
		printf("%9s  %13.1f\n\n", "Average", s_latest_avglat);
	} else {
		printf("%9s  %13.1f\n\n", "Average", 0.0);
	}

	if (s_buf != NULL) {
		buf_release(s_buf);
	}

	exit (0);
}

int
main(int argc, char *argv[])
{
	int node_alloc = -1, cpu_consumer = -1;
	int meas_sec = MEAS_TIME_DEFAULT;
	int ret = -1;
	int c;

	s_randseed = 0;
	optind = 1;
	opterr = 0;

	while ((c = getopt(argc, argv, "a:c:hf:t:s:")) != EOF) {
		switch (c) {
		case 'h':
			print_usage(argv[0]);
			ret = 0;
			goto L_EXIT0;

		case 'a':
			node_alloc = atoi(optarg);
			break;
			
		case 'c':
			cpu_consumer = atoi(optarg);
			break;
			
		case 't':
			meas_sec = atoi(optarg);
			break;

		case 's':
			s_randseed = atoi(optarg);
			break;

		case ':':
			printf("Missed argument for option %c.\n",
			    optopt);
			print_usage(argv[0]);
			goto L_EXIT0;

		case '?':
			printf("Unrecognized option %c.\n", optopt);
			print_usage(argv[0]);
			goto L_EXIT0;
		}
	}
	
	s_ncpus = sysconf(_SC_NPROCESSORS_CONF);

	if (node_alloc == -1) {
		printf("Missed argument for option '-a'.\n");
		print_usage(argv[0]);
		goto L_EXIT0;
	}

	if (cpu_consumer == -1) {
		printf("Missed argument for option '-c'.\n");
		print_usage(argv[0]);
		goto L_EXIT0;
	}

	if ((signal(SIGINT, sigint_handler) == SIG_ERR) ||
	    (signal(SIGHUP, sigint_handler) == SIG_ERR) ||
	    (signal(SIGQUIT, sigint_handler) == SIG_ERR) ||
	    (signal(SIGTERM, sigint_handler) == SIG_ERR) ||
	    (signal(SIGPIPE, sigint_handler) == SIG_ERR)) {
		goto L_EXIT0;	
	}

	gettimeofday(&s_tvbase, 0);
	os_calibrate(&s_nsofclk, &s_clkofsec);
	
	if ((s_buf = buf_create(node_alloc)) == NULL) {
		printf("Failed to create buffer.\n");
		goto L_EXIT0;
	}

	if (dependent_read(s_buf, cpu_consumer, node_alloc, meas_sec) != 0) {
		printf("Failed to dependent read.\n");
		goto L_EXIT0;
	}
	
	ret = 0;

L_EXIT0:
	if (s_buf != NULL) {
		buf_release(s_buf);
	}

	return (ret);
}

static int
last_free_elem(void)
{
	int i, cnt = 0;
	
	for (i = 0; i < RAND_ARRAY_SIZE; i++) {
		if (s_rand_arr[i] == INVALID_RAND) {
			cnt++;
			if (cnt > 1) {
				return (0);
			}	
		}
	}
	
	if (cnt == 1) {
		return (1);
	}
	
	return (0);	
}

static void
rand_array_init(void)
{
	int i, r, index = 0;

	if (s_randseed == 0) {
		s_randseed = time(0);
	}
	
	srand(s_randseed);
	
	for (i = 0; i < RAND_ARRAY_SIZE; i++) {
		s_rand_arr[i] = INVALID_RAND;
	}
	
	while (1) {
		for (;;) {
			r = rand() % RAND_ARRAY_SIZE;
			if (s_rand_arr[r] == INVALID_RAND) {
				break;
			}
		}
		
		if ((s_rand_arr[index] == INVALID_RAND) &&
		    (index != r)) {
			s_rand_arr[index] = r;
			index = r;
		}

		if (last_free_elem()) {
			s_rand_arr[index] = RAND_ARRAY_SIZE;
			break;
		}		
	}		
}

static void
rand_buf_init(void *buf, int size)
{
	int nblk = size / (RAND_ARRAY_SIZE * BUF_ELE_SIZE);
	int i, j;
	uint64_t **p, **blk_start, **end = NULL;
	
	p = (uint64_t **)buf;
	for (i = 0; i < nblk; i++) {
		blk_start = p;
		for (j = 0; j < RAND_ARRAY_SIZE; j++) {
			if (s_rand_arr[j] == RAND_ARRAY_SIZE) {
				end = p;
			}

			*p = (uint64_t *)((char *)blk_start + (s_rand_arr[j] * BUF_ELE_SIZE));
			p = (uint64_t **)((char *)p + BUF_ELE_SIZE);
		}
	}
	
	if (end != NULL) {
		*end = (uint64_t *)buf;	
	}
}

static void
buf_init(void *buf, int size)
{
	rand_array_init();
	rand_buf_init(buf, size);
}

static void *
buf_create(int node_alloc)
{
	void *buf;

	buf = numa_alloc_onnode(BUF_SIZE, node_alloc);
	if (buf != NULL)
		buf_init(buf, BUF_SIZE);

	return (buf);
}

static void
buf_release(void *buf)
{
	numa_free(buf, BUF_SIZE);	
}

static int
dependent_read(void *buf, int cpu_consumer, int node_alloc, int meas_sec)
{
	if (processor_bind(cpu_consumer) != 0) {
		return (-1);
	}

	fprintf(stdout, "\n!!! The reported latency is not the official data\n");
	fprintf(stdout, "    from " CORP ", it's just a tool to test numatop !!!\n");

	fprintf(stdout, "\nGenerating memory access from cpu%d to node%d for ~%ds ...\n",
	    cpu_consumer, node_alloc, meas_sec);

	fprintf(stdout, "(random seed to build random address array is %u.)\n", s_randseed);

	arch__dependent_read(buf, meas_sec);

	return (0);
}
